/********************************************************************************
	data_clean.do
	
	This file produces all of the results provided in 
	The Political Resource Blessing or Curse?
	Patronage Networks, Infrastructure Investment, and
	Economic Development in China
	Author: Zhenhuan Lei
	Date:2022/08/25
	Program: Stata 17 MP
	Environment: Win 11
	
*********************************************************************************/

clear
use data/construction_data_clean_V3.dta, clear
	
*合并营商指数
	merge m:m city_code year using data/BIndex1718, keepusing(bindex1-bindex8)
	drop if _merge == 2
	drop _merge
	
*Merge mayor and PS data*
	rename city_code cityID
	replace cityID = cityID * 100

	merge m:m cityID year using data/citymayor_connection, ///
	keepusing(name promotion1 careerend connection_home connection_college connection_work connection_prom)
	drop if _merge == 2
	drop _merge
	rename name mayor_name
	rename promotion1 mayor_prom
	rename careerend mayor_end
	rename connection_home mayor_conn_home
	rename connection_college mayor_conn_college
	rename connection_work mayor_conn_work
	rename connection_prom mayor_conn_prom
	
	merge m:m cityID year using data/cityps_connection, ///
		keepusing(name promotion1 careerend connection_home connection_college ///
		connection_work connection_prom province_member province_vps central_member)
	drop if _merge == 2
	drop _merge
	rename name ps_name
	rename promotion1 ps_prom
	rename careerend ps_end
	rename connection_home ps_conn_home
	rename connection_college ps_conn_college
	rename connection_work ps_conn_work
	rename connection_prom ps_conn_prom
	
	replace province_member = 0 if province_member == .
	replace province_vps = 0 if province_vps == .
	replace central_member = 0 if central_member == .
	
*Merge mayor and PS connection from Jiang (2018)*
	rename cityID cityid
	
	merge m:m cityid year using data/citypanel_base.dta, ///
		keepusing(msec2currentsec - mayor_highest_edu)
	drop if _merge == 2
	drop _merge
	
	merge m:m cityid year using data/econ_panel.dta, keepusing(gdpidx - mv3_growexp)		
	drop if _merge == 2
	drop _merge

	rename cityid cityID
		
	save infrastructure.dta, replace
	
**Merge city leader data from Lei and Zhou (2022) replication data**
	use data/Subway_clean_190702.dta, clear
	rename City_Code cityID 
	destring cityID, replace
	replace cityID = cityID * 100
	save data/Subway_new.dta, replace
	
	use infrastructure.dta, clear
	merge m:m cityID year using data/Subway_new.dta
	rename nmzrk pop
	rename zrzzl pop_growth
	rename GRP gdp
	rename GRP_per gdp_per
	rename GRP_growth gdp_growth
	rename gdtzze inv
	rename fdctzze inv_re
	drop fdctzzezgdtz
	rename dwcyry employ
	rename syhgtcyry employ_private
	rename nmczsyrs unemploy
	rename dycycyry employ_sec1
	rename decycyry employ_sec2
	rename dscycyry employ_sec3
	rename Budget_income revenue
	rename dfczysnzc expend
	rename jysyfzc expend_edu
	rename kxsyfzc expend_rd
	
	drop _merge
	
	save infrastructure.dta, replace
	
*Merge city yearly statistical book*
	use infrastructure.dta, clear
	rename cityID city_id
	replace city_id = city_id/ 100

	merge m:m city_id year using data/citydes_20200718.dta, keepusing(population_hr_c-tele_d)
	drop if city_id == 9999
	drop _merge
	
	save infrastructure.dta, replace
	
*Merge exports and imports data*
	rename city_id cityid
	merge m:m cityid year using data/export.dta, keepusing(export)
	drop if _merge == 2
	drop _merge
	merge m:m cityid year using data/import.dta, keepusing(import)
	drop if _merge == 2
	drop _merge	
	
	save infrastructure.dta, replace
	
*Merge Jiang et al 2019 replication data on government reports*
	use infrastructure.dta, clear
	replace cityid = cityid * 100
	merge m:m cityid year using data/Jiangdata.dta, keepusing(pv_G10_topic0- pv_G10_topic9 G10_topic0- G10_topic9 logprotest logcombinedprotest logprovcitymsg)
	drop if _merge == 2
	drop _merge
	
*Merge gov bonds data*
	merge m:m cityid year using data/city_bond.dta, keepusing(num_bonds total_amount)
	drop if _merge == 2
	drop _merge
	
	replace cityid = cityid / 100
	rename cityid cityID
	
	save infrastructure.dta, replace

*** Generate Treatment Group (Connection in 2009 is 1) ***	
	use infrastructure.dta, clear
	
	keep if year == 2010
	keep cityID mayor_conn_prom ps_conn_prom
	rename mayor_conn_prom treat_mayor10
	rename ps_conn_prom treat_ps10
	label var treat_mayor10 "Connected mayor (promotion def.) in 2010"
	label var treat_ps10 "Connected PS (promotion def.) in 2010"
	save city_2010.dta, replace
	
	use infrastructure.dta, clear
	xtset cityID year
	
	gen after2 = (year >= 2008)
	gen export_shock = (L.export - export) / L.export
	replace export_shock = 0 if export_shock < 0
	gen export_treat = export_shock * after2
	
	gen export_dependence0507 = (L2.export * 7.09 * 100 / L2.gdp_c + L3.export * 7.09 * 100 / L3.gdp_c + L4.export * 7.09 * 100 / L4.gdp_c) / 3
	replace export_dependence0507 = 1 if export_dependence0507 > 1
	
	keep if year == 2009
	keep cityID pro_code mayor_conn_prom ps_conn_prom export_shock export_dependence0507
	rename mayor_conn_prom treat_mayor09
	rename ps_conn_prom treat_ps09
	label var treat_mayor09 "Connected mayor (promotion def.) in 2009"
	label var treat_ps09 "Connected PS (promotion def.) in 2009"
	
	gen treat09 = (treat_mayor09 == 1 | treat_ps09 == 1)
	gen city = 1 if pro_code != .
	sort pro_code
	by pro_code: egen pro_treated = sum(treat09)
	by pro_code: egen pro_city = sum(city)
	gen treat_share = pro_treated / pro_city
	gen treat_share2 = (pro_treated - treat09) / (pro_city - 1)
	
	drop treat09
	
	save city_2009.dta, replace

	use infrastructure.dta, clear
	
	keep if year == 2008
	keep cityID mayor_conn_prom ps_conn_prom
	rename mayor_conn_prom treat_mayor08
	rename ps_conn_prom treat_ps08
	label var treat_mayor08 "Connected mayor (promotion def.) in 2008"
	label var treat_ps08 "Connected PS (promotion def.) in 2008"
	save city_2008.dta, replace

	use infrastructure.dta, clear
	
	keep if year == 2007
	keep cityID mayor_conn_prom ps_conn_prom 
	rename mayor_conn_prom treat_mayor07
	rename ps_conn_prom treat_ps07
	label var treat_mayor07 "Connected mayor (promotion def.) in 2007"
	label var treat_ps07 "Connected PS (promotion def.) in 2007"
	save city_2007.dta, replace
	
	use infrastructure.dta, clear
	merge m:m cityID using city_2010
	drop if _merge != 3
	drop _merge
	merge m:m cityID using city_2009
	drop if _merge != 3
	drop _merge
	merge m:m cityID using city_2008
	drop if _merge != 3
	drop _merge	
	merge m:m cityID using city_2007
	drop if _merge != 3
	drop _merge	
	save infrastructure_use.dta, replace

	erase infrastructure.dta
	
		
*** Create variables ***	
	
	use infrastructure_use.dta, clear
	xtset cityID year
	set matsize 11000
	
	gen after = (year >= 2009)
	gen after2 = (year >= 2008)
	gen after3 = (year >= 2006)
	
	gen treat07 = (treat_mayor07 == 1 | treat_ps07 == 1)
	gen treat08 = (treat_mayor08 == 1 | treat_ps08 == 1)
	gen treat09 = (treat_mayor09 == 1 | treat_ps09 == 1)
	gen treat10 = (treat_mayor10 == 1 | treat_ps10 == 1)
	
	gen connected = (mayor_conn_prom == 1 | ps_conn_prom == 1)
	
	gen pop_mi = population_hr_c/100
	gen lroad_inv = log(road_bridge + 1)
	gen lroad_inv_per = log(road_bridge / population_hr_c + 1)
	gen road_inv_gdp = road_bridge / gdp_c * 100
	gen road_inv_per = road_bridge / population_hr_c
	gen lpop = log(population_hr_c)
	gen lgdp_per = log(gdp_percapita_c)
	gen lrev_per = log(publicfinance_income_c / population_hr_c)
	gen lexp_per = log(publicfinance_expenditure_c / population_hr_c)
	gen deficit = -(publicfinance_income_c - publicfinance_expenditure_c) / gdp_c * 100
	gen unemp = population_unemployed_c / (population_hr_c * 100)
	replace gdp_primaryshare_c = . if gdp_primaryshare_c > 100
	gen gdpper_sec1 = gdp_percapita_c * gdp_primaryshare_c / 100
	gen gdpper_sec2 = gdp_percapita_c * gdp_secondary_c / 100
	gen gdpper_sec3 = gdp_percapita_c * gdp_tertiary_c / 100
	gen lgdpper_sec1 = log(gdp_percapita_c * gdp_primaryshare_c / 100)
	gen lgdpper_sec2 = log(gdp_percapita_c * gdp_secondary_c / 100)
	gen lgdpper_sec3 = log(gdp_percapita_c * gdp_tertiary_c / 100)
	gen lloan_per = log(loans_national_c / population_hr_c)
	gen ldeposit_per = log(deposit_national_c / population_hr_c)	
	gen loan_per = loans_national_c / population_hr_c
	gen domestic_ave = output_domestic_c / firm_domestic_c
	gen hmt_ave = output_hmt_c / firm_hmt_c
	gen foreign_ave = output_foreign_c / firm_foreign_c
	gen ldomestic_ave = log(output_domestic_c / firm_domestic_c)
	gen lhmt_ave = log(output_hmt_c / firm_hmt_c)
	gen lforeign_ave = log(output_foreign_c / firm_foreign_c)
	gen fixinv_per = investment_fixed_c / population_hr_c
	gen bond_gdp = (total_amount * 100 * (1000)^2 / (gdp_c * 10000)) * 100
	gen domloan_gdp = dom_loan / gdp_c * 100
	gen center_gdp = central_fund / gdp_c * 100
	gen local_gdp = local_fund / gdp_c * 100
	gen rev_per = publicfinance_income_c / population_hr_c
	gen exp_per = publicfinance_expenditure_c / population_hr_c
	gen unemp_rate = unemploy / (population_hr_c * 100) * 100
	
	gen pro_year = pro_code * 10000 + year
	
	*Mayor and CPS's eductaion and age in 2008 and 2009
		gen msec_age09a = PS_age if year == 2009
		by cityID: egen ps_age09 = min(msec_age09a)
		drop msec_age09a	
		
		gen msec_age08a = PS_age if year == 2008
		by cityID: egen ps_age08 = min(msec_age08a)
		drop msec_age08a
		
		gen mayor_age09a = Mayor_age if year == 2009
		by cityID: egen mayor_age09 = min(mayor_age09a)
		drop mayor_age09a	
		
		gen mayor_age08a = Mayor_age if year == 2008
		by cityID: egen mayor_age08 = min(mayor_age08a)
		drop mayor_age08a	
		
		gen ps_edu09a = PS_c_edu if year == 2009
		by cityID: egen ps_edu09 = min(ps_edu09a)
		drop ps_edu09a
		
		gen ps_edu08a = PS_c_edu if year == 2008
		by cityID: egen ps_edu08 = min(ps_edu08a)
		drop ps_edu08a		
		
		gen mayor_edu09a = Mayor_c_edu if year == 2009
		by cityID: egen mayor_edu09 = min(mayor_edu09a)
		drop mayor_edu09a
		
		gen mayor_edu08a = Mayor_c_edu if year == 2008
		by cityID: egen mayor_edu08 = min(mayor_edu08a)
		drop mayor_edu08a
	
	*Mayor and CPS's race and gender*
	tab Mayor_c_sex, gen(mayor_gender)
	tab Mayor_c_ethnicity, gen(mayor_race)
	tab PS_c_sex, gen(PS_gender)
	tab PS_c_ethnicity, gen(PS_race)
	
	*Topics for gov reports
	gen G10_infrastructure = G10_topic0 + G10_topic1
	gen G10_welfare = G10_topic2 + G10_topic9
	gen G10_development = G10_topic3 + G10_topic4 + G10_topic7 + G10_topic8
	gen G10_politics = G10_topic5 + G10_topic6
	
	*Dummies for vice-province cities*
	capture drop fsj
	gen fsj = (cityID == 4401 | cityID == 4201 | cityID == 2301 | ///
	cityID == 5101 | cityID == 3201 | cityID == 6101 | ///
	cityID == 2201 | cityID == 3701 | cityID == 3301 | ///
	cityID == 2102 | cityID == 3702 | cityID == 4303 | ///
	cityID == 3502 | cityID == 3302)
	
	*Control variables in 2003*
	global cont03 pop_mi population_gr_c unemp gdp_growth_c gdp_percapita_c gdp_primaryshare_c gdp_secondary_c gdp_tertiary_c rev_per exp_per fixinv_per 
	foreach var03 of var $cont03{
		gen c03a_`var03' = `var03' if year == 2003
		by cityID: egen c03_`var03' = min(c03a_`var03')
		drop c03a_`var03'
		gen c03t_`var03'= c03_`var03' * year
		*gen c03t2_`var03'= c03_`var03' * year^2
		*gen c03t3_`var03'= c03_`var03' * year^3
		gen c03a_`var03'= c03_`var03' * after2
		forvalues y = 2003/2016{
			gen c03y_`var03'`y'=c03_`var03' * `y'.year
		}
	}
	
	*Control variables in 2007
	global cont07 lpop population_gr_c unemp gdp_growth_c lgdp_per gdp_primaryshare_c gdp_secondary_c gdp_tertiary_c lrev_per lexp_per deficit
	
	foreach var07 of var $cont07{
		gen c07a_`var07' = `var07' if year == 2007
		by cityID: egen c07_`var07' = min(c07a_`var07')
		drop c07a_`var07'
	}
	
	*Placebo treatments
	forvalues y = 2003/2016{
		gen tempconn_`y' = (mayor_conn_prom | ps_conn_prom) if !missing(mayor_conn_prom) & !missing(ps_conn_prom) & year == `y'
		by cityID: egen treat`y' = min(tempconn_`y')
		gen placebo`y' = treat`y'*after2
	}
	drop tempconn*
	
	* New leaders in 2008 or 2009
	capture drop new_msec new_mayor
	gen new_msec = .
	gen new_mayor = .
	replace new_msec = 0 if PS_leaderindex == L.PS_leaderindex & year > 2003
	replace new_msec = 1 if PS_leaderindex != L.PS_leaderindex & year > 2003
	replace new_mayor = 0 if Mayor_leaderindex == L.Mayor_leaderindex & year > 2003
	replace new_mayor = 1 if Mayor_leaderindex != L.Mayor_leaderindex & year > 2003
	
	gen new_msec09a = new_msec if year == 2009
	by cityID: egen new_msec09 = min(new_msec09a)
	drop new_msec09a
	gen new_msec08a = new_msec if year == 2008
	by cityID: egen new_msec08 = min(new_msec08a)
	drop new_msec08a
	gen new_msec0809 = (new_msec09 | new_msec08)
	
	gen new_mayor09a = new_mayor if year == 2009
	by cityID: egen new_mayor09 = min(new_mayor09a)
	drop new_mayor09a
	gen new_mayor08a = new_mayor if year == 2008
	by cityID: egen new_mayor08 = min(new_mayor08a)
	drop new_mayor08a
	gen new_mayor0809 = (new_mayor09 | new_mayor08)
	
	gen new_cl0809 = (new_msec0809 | new_mayor0809)
	gen newcl_treat = new_cl0809 * after2
	
	gen new_psec0809 = 0
	replace new_psec0809 = 1 if pro_code == 13 | pro_code == 21 | pro_code == 23 | pro_code ==32 | pro_code ==34 | pro_code ==36 | pro_code ==37 | pro_code ==42 | pro_code ==44 | pro_code ==45 | pro_code ==51
	gen newpsec_treat = new_psec0809 * after2
	 	
	*Create primary explanatory variables
		gen main_treat = treat09*after2
		gen placebo07 = treat07*after2
		gen placebo10 = treat10*after2
		capture drop main_treat2 main_treat3
		gen main_treat2 = 0
		replace main_treat2 = 1 if treat_ps09 == 1 & treat_mayor09 == 0
		replace main_treat2 = 2 if treat_ps09 == 0 & treat_mayor09 == 1
		replace main_treat2 = 3 if treat_ps09 == 1 & treat_mayor09 == 1
		gen main_treat3=0
		replace main_treat3 = 1 if treat08 == 1 & treat09 == 0 
		replace main_treat3 = 2 if treat08 == 0 & treat09 == 1
		replace main_treat3 = 3 if treat08 == 1 & treat09 == 1
		
		gen ps_treat09 = treat_ps09 * after2
		gen mayor_treat09 = treat_mayor09 * after2
		
	*Saving data
	drop if pro_code == .
	save infrastructure_use.dta, replace
